This document will show you how to create aggregate/contingency tables, maps, and other visualizations using 2009 public use NHTS Data. (Download here: http://nhts.ornl.gov/download.shtml)

# load packages
library(data.table)
library(htmlTable)
library(magrittr) # Not required for analysis, but function chaining helps readability.

# source code
source('make_table.R')
source('make_bar_chart.R')
source('make_html_table.R')
source('cross_tab_table.R')
source('util.R')

Reading and Merging the Data

The first step is to read in the data. Due to the size of the csvs, we will read in only necessary columns using data.table’s fread function. We will then merge the weights with the survey data using unique identifiers.

# Read variable metadata table
variables <- fread("./data/2009/variables.csv")
# Read variable value lookup table 
labels <- fread("./data/2009/labels.csv")


# There are 100 replicate weight names. Use get_wgt_names to create the vector of names.
wgt_names <- get_wgt_names('DAYWGT') #Using trip weights in this example.

# Read in the datasets containing the variables/keys necessary to perform analysis
per50wt <- fread(
  input = './data/2009/per50wt.csv', 
  select = c('HOUSEID','PERSONID',wgt_names),
  showProgress = F,
  key = c('HOUSEID','PERSONID')
)
DAYV2PUB <- fread(
  input = './data/2009/DAYV2PUB.csv',
  select = c('HOUSEID','PERSONID','FLAG100','HHSIZE','WRKCOUNT','HHVEHCNT','HHSTATE','HHFAMINC','VMT_MILE','TRAVDAY','TRPTRANS','TRIPPURP','STRTTIME','WHYTRP1S'),
  showProgress = F,
  key = c('HOUSEID','PERSONID')
)

# merge the datasets
dt <- DAYV2PUB[per50wt]

invisible(gc()) #garbage collection to help clean memory

Creating Aggregate Tables

We will use the make_table function to create weighted aggregate tables. Frequency, Sum, and Average are currently supported.

1. Number of Trips by Travel Mode

ex1 <- make_table(
  data = dt, 
  agg = 'count',
  factors = 'TRPTRANS',
  wgt_name = 'DAYWGT'
)

ex1 %>%
  use_labels(labels) %>% 
  cross_tab_table(round_digits = 3) %>% 
  make_html_table
DAYWGT se
TRPTRANS
        Airplane 3.11E+08 2.79E+07
        Amtrak/inter city train 9.53E+07 2.06E+07
        Appropriate skip 8.98E+07 2.39E+07
        Bicycle 4.08E+09 1.81E+08
        Car 1.74E+11 1.57E+09
        Charter/tour bus 2.41E+08 4.74E+07
        City to city bus 5.99E+07 5.39E+07
        Commuter bus 6.66E+08 8.61E+07
        Commuter train 5.61E+08 6.66E+07
        Dont know 4.36E+08 9.06E+07
        Ferry 3.99E+07 9.65E+06
        Light electric veh (golf cart) 1.83E+08 4.82E+07
        Local public bus 4.69E+09 2.13E+08
        Motorcycle 1.05E+09 8.70E+07
        Not ascertained 3.08E+07 1.52E+07
        Other 2.31E+09 1.67E+08
        Other truck 1.91E+09 2.82E+08
        Pickup truck 4.04E+10 7.49E+08
        Refused 1.73E+08 3.17E+07
        RV 1.03E+08 3.66E+07
        School bus 6.71E+09 2.24E+08
        Shuttle bus 6.51E+08 1.07E+08
        Special transit-people w/disabilities 2.74E+08 3.70E+07
        Street car/trolley 6.81E+07 1.64E+07
        Subway/elevated train 1.54E+09 1.26E+08
        SUV 6.93E+10 1.14E+09
        Taxicab 7.38E+08 7.78E+07
        Van 4.06E+10 8.95E+08
        Walk 4.10E+10 7.53E+08
invisible(gc())

2. Average Driver Vehicle Miles by Trip Purpose Summary

dt[, WRKCOUNT := as.character(WRKCOUNT)][as.numeric(WRKCOUNT) >= 4, WRKCOUNT := '4+']

ex2 <- make_table(
  data = dt, 
  agg = 'avg',
  agg_var = 'VMT_MILE',
  factors = 'WHYTRP1S',
  wgt_name = 'DAYWGT',
  subset = 'VMT_MILE >= 0 & WHYTRP1S >= 0',
  variance = 'moe' #Option to get Margin of Error, instead of default Standard Error
)

ex2 %>%
  use_labels(labels) %>% 
  make_bar_chart
invisible(gc())

3. Average Person Trip Rate by Travel Day, Household Size and Number of Workers

# Put a ceiling on HHSIZE and WRKCOUNT variables
dt[, HHSIZE := as.character(HHSIZE)][as.numeric(HHSIZE) >= 4, HHSIZE := '4+']
dt[, WRKCOUNT := as.character(WRKCOUNT)][as.numeric(WRKCOUNT) >= 4, WRKCOUNT := '4+']

ex3 <- make_table(
  data = dt, 
  agg = 'person_trip_rate',
  factors = c('TRAVDAY','HHSIZE','WRKCOUNT'),
  wgt_name = 'DAYWGT',
  subset = 'FLAG100 == 1'
)

# Create formatted "report-type" html table
ex3 %>%
  use_labels(labels) %>% 
  cross_tab_table(round_digits = 3) %>% 
  make_html_table
1   2   3   4+
TRAVDAY
by
WRKCOUNT
DAYWGT se   DAYWGT se   DAYWGT se   DAYWGT se
Friday
        0 4.467 0.141   4.335 0.102   4.007 0.3   3.676 0.195
        1 4.982 0.164   4.701 0.084   4.51 0.163   4.546 0.181
        2 0 0   4.803 0.11   4.588 0.12   4.939 0.13
        3 0 0   0 0   4.345 0.196   4.566 0.14
        4+ 0 0   0 0   0 0   4.736 0.323
Monday
        0 4.073 0.142   4.235 0.091   4.367 0.313   3.944 0.157
        1 4.652 0.166   4.414 0.136   4.055 0.128   4.458 0.148
        2 0 0   4.477 0.119   4.156 0.127   4.096 0.103
        3 0 0   0 0   4.134 0.199   4.223 0.175
        4+ 0 0   0 0   0 0   4.492 0.273
Saturday
        0 3.952 0.153   4.126 0.095   4.068 0.282   3.782 0.32
        1 5.117 0.172   4.711 0.144   4.548 0.259   4.379 0.185
        2 0 0   4.841 0.128   4.654 0.156   4.619 0.145
        3 0 0   0 0   4.062 0.197   4.377 0.226
        4+ 0 0   0 0   0 0   4.443 0.238
Sunday
        0 3.774 0.124   3.872 0.136   3.162 0.291   3.357 0.304
        1 4.492 0.122   4.006 0.144   3.875 0.166   4.194 0.134
        2 0 0   4.222 0.107   3.976 0.139   3.825 0.118
        3 0 0   0 0   4.105 0.187   4.175 0.301
        4+ 0 0   0 0   0 0   3.909 0.203
Thursday
        0 4.344 0.224   4.331 0.124   4.117 0.592   3.761 0.223
        1 4.611 0.16   4.429 0.114   4.261 0.167   4.677 0.164
        2 0 0   4.388 0.121   4.485 0.146   4.297 0.101
        3 0 0   0 0   3.932 0.241   3.965 0.117
        4+ 0 0   0 0   0 0   4.039 0.145
Tuesday
        0 4.375 0.154   4.425 0.138   5.031 0.243   4.204 0.299
        1 4.472 0.165   4.615 0.111   4.265 0.096   4.732 0.161
        2 0 0   4.361 0.136   4.423 0.111   4.401 0.122
        3 0 0   0 0   4.408 0.16   4.273 0.141
        4+ 0 0   0 0   0 0   3.897 0.18
Wednesday
        0 4.493 0.156   4.495 0.105   3.972 0.217   3.789 0.243
        1 4.555 0.124   4.282 0.125   4.171 0.156   4.277 0.12
        2 0 0   4.436 0.097   4.565 0.136   4.322 0.111
        3 0 0   0 0   4.024 0.28   4.14 0.151
        4+ 0 0   0 0   0 0   4.572 0.306
invisible(gc())

Creating Interactive Maps

source('load_map_layer.R')
source('map(functions).R')

tbl1 <- make_table(
  data = dt, 
  agg = 'person_trip_rate',
  factors = 'HHSTATE',
  wgt_name = 'DAYWGT',
  subset = 'FLAG100 == 1 & WHYTRP1S >= 0'
)

tbl2 <- make_table(
  data = dt, 
  agg = 'person_trip_rate',
  factors = c('HHSTATE','WHYTRP1S'),
  wgt_name = 'DAYWGT',
  subset = 'FLAG100 == 1 & WHYTRP1S >= 0'
)

tbl2 <- use_labels(tbl2, labels, keep = 'WHYTRP1S')

#list of attributes to copy for each factor split
attr_names <- c('response','factors','variance','factors_label','response_labels')

#split the data.table by HHSTATE and create svg for plot of each state
gg_html <- sapply(with(tbl2, split(tbl2, HHSTATE)), function(tbl) {
  
  for(i in attr_names) setattr(tbl, i, attr(tbl2, i))
  g <- make_bar_chart(tbl, interactive = F)
  gsub("'", "\"", g$x$html) #remove single quotes from html string
  
})

#Create column of state names for merging
gg_html <- cbind(STUSPS = attr(gg_html, "names"), tooltip = unlist(gg_html))

#load geom_state_layer
load_map_layer('state_layer')

#Merge tbl1 with state_layer and then merge the tooltip
merged <- merge(x = state_layer, y = tbl1, by.x = 'STUSPS', by.y = 'HHSTATE')
merged <- merge(merged, gg_html, by = 'STUSPS')

geom_state_layer <- geom_polygon_interactive(
  data = merged,
  mapping = aes(
    x = long,
    y = lat,
    group = group,
    tooltip = paste0(NAME,'\n',tooltip),
    fill = DAYWGT,
    use_jquery = TRUE,
    data_id = STATEFP,
    hover_css = "fill:orange"),
  color = "#FFFFFF",
  size = 0.35)

#Create map
map <- ggplot() + 
  geom_state_layer + 
  coord_fixed() + 
  theme_void() + 
  scale_fill_gradient(low = "#f2f0f7", high = "#54278f")

tooltip_css <- "background-color:#F2F2F2;padding:10px;border-radius:10px 20px 10px 20px;"

invisible(gc())

3. Average person trip rate by state (Hover to see state breakdown of Average person trip rate by Trip Purpose Summary)

map_widget <- ggiraph(code = {print(map)}, zoom_max = 12, tooltip_extra_css  = tooltip_css)
map_widget
invisible(gc())